.libPaths(c("/samurlab1/Joshua/more_Rlib/", .libPaths()))
# Mature Neutrophil differential analysis 
# will compare the 3 mature neutrophil clusters in single cell RNA-seq dataset 
# replot in low dim space using HVF 
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(clusterProfiler)
## 
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang,
## W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize
## multiomics data. Nature Protocols. 2024, doi:10.1038/s41596-024-01020-z
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Hs.eg.db) 
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%()     masks IRanges::%within%()
## ✖ IRanges::collapse()       masks dplyr::collapse()
## ✖ Biobase::combine()        masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc()           masks dplyr::desc()
## ✖ tidyr::expand()           masks S4Vectors::expand()
## ✖ clusterProfiler::filter() masks dplyr::filter(), stats::filter()
## ✖ S4Vectors::first()        masks dplyr::first()
## ✖ dplyr::lag()              masks stats::lag()
## ✖ ggplot2::Position()       masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()           masks IRanges::reduce()
## ✖ S4Vectors::rename()       masks clusterProfiler::rename(), dplyr::rename()
## ✖ lubridate::second()       masks S4Vectors::second()
## ✖ lubridate::second<-()     masks S4Vectors::second<-()
## ✖ AnnotationDbi::select()   masks clusterProfiler::select(), dplyr::select()
## ✖ purrr::simplify()         masks clusterProfiler::simplify()
## ✖ IRanges::slice()          masks clusterProfiler::slice(), dplyr::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## 
## The following object is masked from 'package:IRanges':
## 
##     slice
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(monocle3)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## 
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## 
## 
## Attaching package: 'monocle3'
## 
## The following objects are masked from 'package:Biobase':
## 
##     exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
library(ggplot2)

Pre-Neutrophil scoring. We must convert the list of pre-neutrophil signature from mouse transcripts to huma using this script below.

#neutrophil single cell trajectory analysis
preneu.feats.mus = read.delim(file = "/samurlab1/Joshua/MM_scRNAseq/preneugenes.txt", sep ="")
preneu.feats.mus = preneu.feats.mus$x

#currently, features are in mouse symbol annotation, if we use as is it will coerce an error
#this function uses ensemble annotated features to convert gene names. 
musGenes = preneu.feats.mus

mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_mouse_to_human <- function(gene_list){
  
  output = c()
  
  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }
  
  return (output)
}

preneu.homo = convert_mouse_to_human(as.vector(preneu.feats.mus))

neuts = AddModuleScore(neuts, features = list(preneu.homo), name = "preneu.sig")
## Warning: The following features are not present in the object: F10, ELDR,
## HSPB6, XKR5, not searching for symbol synonyms
FeaturePlot(neuts, features = "preneu.sig1", cols = c("steelblue1","firebrick3"), pt.size  = 0.1)

FeaturePlot(neuts, features = "preneu.sig1", split.by="tissue" ,cols = c("steelblue1","firebrick3"), pt.size  = 0.1)

## selecting .1 as a stringent cut off for origin local in minimum spanning tree

neuts@reductions$umap@cell.embeddings = neuts@reductions$umap@cell.embeddings[,c(1,2)]
neuts$predicted = NULL
## Warning: Cannot find cell-level meta data named predicted
hist(neuts$preneu.sig1)

neuts$stem = neuts$preneu.sig1 > .1
# selecting .1 as a stringent cut off for origin local in minimum spanning tree
stemcells = colnames(neuts[,neuts$stem == TRUE])

####monocle3 for sc trajectories####
neuts.cds = as.cell_data_set(neuts)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
neuts.cds@rowRanges@elementMetadata@listData[["gene_short_name"]] = rownames(neuts.cds[["SCT"]])
neuts.cds = preprocess_cds(neuts.cds, num_dim = 50)
neuts.cds = cluster_cells(neuts.cds)
neuts.cds = learn_graph(neuts.cds)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
neuts.cds <- order_cells(neuts.cds, root_cells = stemcells)

# neuts.cds_pr_test_res = graph_test(neuts.cds, neighbor_graph="principal_graph", cores=64

plot_cells(neuts.cds,
           color_cells_by = "pseudotime",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           show_trajectory_graph = TRUE, label_principal_points = F, label_roots = T)
## Cells aren't colored in a way that allows them to be grouped.

Supplemental Figure 2C T1, T2, and T3 signature comparison from Science. 2024;383(6679):eadf6493. doi:10.1126/science.adf6493

#T1 signature####
neuts = AddModuleScore(neuts, features =list(c("CXCR2","CD300LD","DUSP1","GBP2","IFITM1","IL1B","ISG15",
                                               "JAML","JUNB","MSRB1","OSM","S100A6","SELPLG","SLPI")),
                       name = "T1_Signature")



t1 = FeaturePlot(neuts, features = "T1_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t1

t1 = VlnPlot(neuts, features = "T1_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.
order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t1$data$ident, levels=order)
t1$data$ident = factor(t1$data$ident, levels=order)
t1

#T2 signature####
neuts = AddModuleScore(neuts, features =list(c("LTC4S","MMP8","MMP9","PPIA","PRR13","PTMA","RETNLG",
                                               "TNFAIP3","IRAK3","CYBB","CCRL2","OLR1","CLEC6A")),
                       name = "T2_Signature")
## Warning: The following features are not present in the object: RETNLG, not
## searching for symbol synonyms
t2 = FeaturePlot(neuts, features = "T2_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t2

t2 = VlnPlot(neuts, features = "T2_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)

order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t2$data$ident, levels=order)
t2$data$ident = factor(t2$data$ident, levels=order)
t2

#T3 signature####
neuts = AddModuleScore(neuts, features =list(c("ATF3","CCL3","CCL4","CD274","CSTB","CXCL3","HCAR2","HILPDA","HK2",
                                          "HMOX1","IER3","JUN","LDHA","MIF","PLIN2","SPP1","TGIF1","TNFRSF10D",
                                          "VEGFA","ZEB2")), name = "T3_Signature")



t3 = FeaturePlot(neuts, features = "T3_Signature1",
                 # split.by = "tissue",
                 cols = c("steelblue3","red"))
t3

t3 = VlnPlot(neuts, features = "T3_Signature1",
             cols = patient_cols,
             split.plot = T,
             split.by = "tissue", 
             pt.size = 0)

order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t3$data$ident, levels=order)
t3$data$ident = factor(t3$data$ident, levels=order)
t3